---
title: "Fit KP degree model to many surveys"
output: html_notebook
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r setglobal, tidy=FALSE, echo=FALSE}
library(knitr)

opts_chunk$set(out.width=600, out.height=600)
library(tidyverse)
library(stringr)

options(mc.cores = parallel::detectCores())
library(rstan)
rstan_options(auto_write = TRUE)

library(tidybayes)

library(tictoc)
library(here)
```

```{r dirs}
root.dir <- here()

code.dir <- file.path(root.dir, 'code')

data.out.dir <- file.path(root.dir, "data", "sim")
out.dir <- file.path(root.dir, "out", "sim")

model.out.dir <- file.path(out.dir, 'rdknownprobe')
```

```{r}
# load nr.censuses
nr.censuses <- readRDS(file=file.path(data.out.dir, "fb100_censuses_perfect.rds"))
# load num.cand.gps
num.cand.gps <- readRDS(file=file.path(data.out.dir, "fb100_svy_num_cand_gps.rds"))
# load simulated survey results
svy.sim.out <- readRDS(file=file.path(data.out.dir, "fb100_svy_sims.rds"))
```

Load model utilities

```{r}
source(file.path(code.dir, '90_model_utils.R'))
```


## Now fit the model to many surveys

```{r compile-model}
###########
## model_random_degree_estprobe estimates probe gp sizes
rdestprobehp_model <- 
  stan_model(file = file.path(code.dir, "91_model_random_degree_knownprobe_hidden.stan"))
```

NB: one survey takes ~ 22 sec
10 takes about 223 sec
10,000 (this next chunk) take about 6.3 hours

```{r}
cur.schools <- unique(svy.sim.out$school)
cur.gp.set.idxes <- c(1)
cur.svy.idxes <- 1:10

tic("Fitting model to surveys")
svy_models <- svy.sim.out %>%
              filter(school %in% cur.schools,
                     gp.set.idx %in% cur.gp.set.idxes,
                     svy.index %in% cur.svy.idxes) %>%
              pmap(fit_stan_model_to_survey, 
                   model=rdestprobehp_model,
                   model_out_dir=model.out.dir,
                   estimate_probegp_size=FALSE,
                   # don't print iteration updates
                   refresh=0,
                   iter=2000,
                   warmup=1000)

saveRDS(svy_models,
     file=file.path(model.out.dir,  "svy_models_10perschool.rds"))
#svy_models <- readRDS(file=file.path(model.out.dir,  "svy_models_10perschool.rds"))

toc()
```


```{r}
svy_models_info <- imap_dfr(svy_models,
                            ~tibble(index=.y,
                                    school=.x$school,
                                    gp.set.idx=.x$gp.set.idx,
                                    svy.index=.x$svy.index))

saveRDS(svy_models_info,
     file=file.path(model.out.dir,  "svy_models_info_10perschool.rds"))
#svy_models_info <- readRDS(file=file.path(model.out.dir,  "svy_models_info_10perschool.rds"))

```

```{r}
svy_models_degree_post <- 
  imap_dfr(svy_models,
           ~tibble(index=.y,
                   school=.x$school,
                   gp.set.idx = .x$gp.set.idx,
                   svy.index = .x$svy.index,
                   post_mean_degree_median = .x$post_mean_degree_median$mean_degree,
                   post_mean_degree_lower = .x$post_mean_degree_median$.lower,
                   post_mean_degree_upper = .x$post_mean_degree_median$.upper))

saveRDS(svy_models_degree_post,
     file=file.path(model.out.dir,  "svy_models_degree_post_10perschool.rds"))
#svy_models_degree_post <- readRDS(file=file.path(model.out.dir,  "svy_models_degree_post_10perschool.rds"))

```


## Unpack and compare results

R-hats
------

```{r}
post_rhats <- map_dfr(svy_models,
                      ~ .x$fit_summ$summary %>% as_tibble(rownames='param') %>%
                        select(param, Rhat, everything()) %>%
                        mutate(school = .x$school,
                               gp.set.idx = .x$gp.set.idx,
                               svy.index = .x$svy.index))

saveRDS(post_rhats,
        file=file.path(model.out.dir,  "svy_models_rhat_10perschool.rds"))

post_rhats
```

```{r}
summary(post_rhats$Rhat)
```


Average degrees
---------------

NB: the mean of a log-normal distribution w/ params mu and sigma
    is exp(mu + sigma2/2)
    (this is calculated as part of the model)

Join true degrees onto posterior degree estimates

```{r}
post_mean_degrees <- svy_models_degree_post %>%
  left_join(svy.sim.out %>%
              select(school, gp.set.idx, svy.index,
                     estimand.dbar, true.dbar, kp.dbar.hat, dbar.hat),
            by=c('school', 'gp.set.idx', 'svy.index')) 
post_mean_degrees
```



```{r}
cur.lim <- 1 

pmd_plot <- ggplot(post_mean_degrees) +
  geom_point(aes(x=(dbar.hat-true.dbar)/true.dbar, 
                 y=(post_mean_degree_median-true.dbar)/true.dbar),
                 alpha=.3) +
  geom_abline(intercept=0, slope=1, color='blue') +
  ylim(-cur.lim, cur.lim) + xlim(-cur.lim, cur.lim) +
  xlab(str_wrap("Relative error in new design-based estimate", 30)) +
  ylab(str_wrap("Relative error in kp model-based posterior median estimate", 30)) +
  theme_bw() +
  coord_equal() +
  NULL

ggsave(filename=file.path(model.out.dir, 'sim_modelkp_design_meandegree_est.pdf'),
       plot=pmd_plot)
ggsave(filename=file.path(model.out.dir, 'sim_modelkp_design_meandegree_est.png'),
       plot=pmd_plot)

pmd_plot
```


